*Settings
clear all
*set matsize 11000, permanently
*ssc install coefplot
*ssc install spgmm
*ssc install spweight
*ssc install binscatter

*Change this to your system name
local user "awcassidy1"
set more off, permanently
cd "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data"

local dir1 "C:\Users/`user'/Dropbox\jmp_new\tables"
local dir2 "C:\Users/`user'/Dropbox\jmp_new\figs"


use "combined_6.dta", clear

la var price "2nd Sale Price"

*make compliance group variable.
egen comply_group=group(condo comply_deadline)

local group1 negleakavg atticrvalue ductravg ductsystem1type rsavg

local group2 eeravg negsysageavg waterheatertanktype sizeavg system1airhandlertype ///
	system1locationairhandler

local group3 progtherm waterheaterfueltype furnacefueltype

local allcontrols sqfttotal num_beds_total num_baths_total pool ///
	 acres yearbuiltaudit 
	
	
*conversion to $/mcf
gen austinelec_equiv1 =  austinelec_adj1*2.930711
gen austinelec_equiv2 =  austinelec_adj2*2.930711
	
gen lfuel_charge_ht = texasgas_adj1 if furnacefueltype==1
gen fuel_charge_ht = texasgas_adj2 if furnacefueltype==1
replace lfuel_charge_ht = austinelec_equiv1 if furnacefueltype==0
replace fuel_charge_ht = austinelec_equiv2 if furnacefueltype==0




gen lfuel_charge_wh = texasgas_adj1 if waterheaterfueltype==1
gen fuel_charge_wh = texasgas_adj2 if waterheaterfueltype==1
replace lfuel_charge_wh = austinelec_equiv1 if waterheaterfueltype==0
replace fuel_charge_wh = austinelec_equiv1 if waterheaterfueltype==0

gen fuel_charge_diff_ht=fuel_charge_ht-lfuel_charge_ht
gen fuel_charge_diff_wh=fuel_charge_wh-lfuel_charge_wh

local auditvars `group1' `group2' `group3'

foreach v of varlist `group1' `group2' `group3' `allcontrols' fuel_charge_diff* {
	sum `v' if !missing(pricediff)
	gen `v'_st=(`v'-`r(mean)')/`r(sd)'
	la var `v'_st "`: var label `v''"
	sum `v'_st
	
	*make condo vars
	gen condo_`v'=`v'*condo
	}
destring zip, replace

sort mlsid relsalenum
xtset mlsid relsalenum


gen closeprice1=l.price if !missing(pricediff)
gen closeprice2=price if !missing(pricediff)


egen home_type_cat=group(home_type)

*check to see if audit after or before list date.

count if auditd>listd & !missing(listd) & !missing(auditd) & !missing(pricediff)
count if auditd<listd & !missing(listd) & !missing(auditd) & !missing(pricediff)
count if auditd==listd & !missing(listd) & !missing(auditd) & !missing(pricediff)

*make audit before listing variable.
gen audit_before_list=0 if  !missing(listd) & !missing(auditd) & !missing(pricediff)
replace audit_before_list=1 if auditd<listd & !missing(listd) & !missing(auditd) & !missing(pricediff)


egen yearbuiltgroup=cut(yearbuiltaudit), at(0, 1915(5)2020) label icodes

*now make the yearbuiltgroup by sale year pair triples
egen ybctriplet=group(yearbuiltgroup saleyear lsaleyear)

*these codes start at 0
*Make them start at 1 so that I can put 1915 as the first group.
replace yearbuiltgroup=yearbuiltgroup+1


*********************************************************************************
*Make heat variables
cap drop ht* valid* toomany*
gen ht_electric=0 if regexm(heating, "Electric")>0 | regexm(heating, "Gas")>0 |  regexm(heating, "Pump")>0
replace ht_electric=1 if regexm(heating, "Electric")>0 |  regexm(heating, "Pump")>0
gen ht_nat_gas=0 if regexm(heating, "Electric")>0 | regexm(heating, "Gas")>0  |  regexm(heating, "Pump")>0
replace ht_nat_gas=1 if regexm(heating, "Gas")>0

replace ht_electric=1 if furnacefueltype==0 & missing(ht_electric)
replace ht_nat_gas=1 if furnacefueltype==1 & missing(ht_nat_gas)


*valid means they reported only one of nat gas or electric.
gen valid_ht=0
replace valid_ht=1 if ht_electric+ht_nat_gas==1


gen toomany_ht=0
replace toomany_ht=1 if ht_electric+ht_nat_gas>1 & !missing(ht_electric) & !missing(ht_nat_gas)

********************************************************************************
*older homes sample.
********************************************************************************

*condos with comply deadline of 2011 vs other homes with comply deadline of 2009
gen sample_olderhomes = 0
replace sample_olderhomes=1 if (condo==1 & yearbuiltaudit<2001) | (condo==0 & yearbuiltaudit<1999)

********************************************************************************
*make other variables
********************************************************************************

*fuel charge of heater
foreach w in sale {
	gen fc_ht_`w' = texasgas_adj if ht_nat_gas==1 & ht_elec!=1
	replace fc_ht_`w' = austinelec_adj if ht_elec==1 & ht_nat_gas!=1
	}
	
*fuel charge of water heater
foreach w in sale {
	gen fc_wh_`w' = texasgas_adj if waterheaterfueltype==1
	replace fc_wh_`w' = austinelec_adj if waterheaterfueltype==0
	}
	
cap drop post_negleakavg

rename system1locationairhandler ah_location
rename system1airhandlertype ah_type

local group2 negleakavg atticrvalue ductravg ductsystem1type rsavg ///
	eeravg negsysageavg waterheatertanktype sizeavg ah_type ///
	ah_location ///
	 progtherm waterheaterfueltype furnacefueltype fc_ht_sale fc_wh_sale
	 
foreach v of local group2 {
	gen post_`v'=post*`v' if !missing(`v')
	}


egen relsalenum_group=group(relsalenum)
xtset mlsid salenum

egen post_yearbuilt_cat=group(yearbuiltcat post)

egen sy_yearbuilt_cat=group(yearbuiltcat saleyear)

egen sy_condo=group(saleyear condo)

gen hy=hofd(saled)

*sample stuff
gen in_sample=1 if (relsalenum==0 & l.relsalenum==-1 & (saleyear>=2005 & saleyear<=2015) & (l.saleyear>=2005 & l.saleyear<=2015)) | ///
	(relsalenum==-1 & f.relsalenum==0 & (saleyear>=2005 & saleyear<=2015) & (f.saleyear>=2005 & f.saleyear<=2015))
egen id_in_sample=sum(in_sample), by(mlsid)


tab id_in_sample
replace id_in_sample=id_in_sample/2

gen f_post_negleakavg=f.post_negleakavg
replace f_post_negleakavg = post_negleakavg if post_negleakavg>0 & !missing(post_negleakavg)

gen month_of_sample=mofd(saled)

	
*now make a new saleyear measure that starts in june
*year jj is year_june to july
cap drop year_jj_*
gen year_jj=.

la var year_jj "Year ending may 31 of current year"
forval s=2000/2019 {
	local l=`s'-1
	gen year_jj_`s'=((saleyear==`s' & salemonth<6) |  (saleyear==`l' & salemonth>=6))
	la var year_jj_`s' "`l'-`s'"
	replace year_jj=`s' if  (saleyear==`s' & salemonth<6) |  (saleyear==`l' & salemonth>=6) 
	}

*make year jj of comply deadline.
gen comply_year_jj = .
la var comply_year_jj "Year immediately following comply deadline, where year starts june 1 of prev year and ending june 1 of current year"

forval s=2009/2019 {
	replace comply_year_jj=`s' if yofd(comply_deadline) ==`s'
	}

foreach v of varlist atticrvalue negleakavg negsysageavg ///
	ductravg eeravg sizeavg ///
		{
		egen `v'_lowhigh=pctile(eeravg), p(50) by(year_jj)
		
		gen h_`v' = 0 if !missing(`v')
		replace h_`v'=1 if !missing(`v') & `v'>`v'_lowhigh
		gen l_`v' = 1 if h_`v'==0
		replace l_`v' =0 if h_`v'==1
		gen pctile_`v' = .
		forval s=2000/2019 {
			xtile pctile_`v'_`s' = `v' if year_jj==`s', nq(99)
			replace pctile_`v' = pctile_`v'_`s' if missing(pctile_`v')
			drop pctile_`v'_`s'
		}
	la var pctile_`v' "`:var label `v'' percentile within year"
	la var h_`v' "Higher than median `v' for year"
	la var l_`v' "Lower than median `v' for year"
	}
			
format comply_deadline %td
format saled %td

xtset mlsid relsalenum
save "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data/estimation_temp.dta", replace
